root.dir <- here::here()
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  root.dir = root.dir
  # fig.height = 12,
  # fig.width = 10
)  
knitr::opts_knit$set(root.dir = root.dir, dpi = 300)   

library(dplyr)
library(ggplot2)
library(Matrix)
set.seed(2020)
source("tfidf.R")
source("utils.R")

color_dict <- c(phenotype="purple", 
                celltype="turquoise")

Human Phenotype Ontology

Data

HPO <- data.table::fread("https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt", nThread = 10) 
#> Warning in data.table::fread("https://ci.monarchinitiative.org/view/hpo/job/
#> hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/
#> phenotype_to_genes.txt", : Detected 1 column names but the data has 7 columns
#> (i.e. invalid file). Added 6 extra default column names at the end.

colnames(HPO) <- gsub("-|[ ]","_",stringr::str_split(gsub("#Format: ","",colnames(HPO)[1]),"<tab>")[[1]])
HPO <- HPO %>% dplyr::mutate(HPOid=HPO_id, HPO_id=gsub("[:]",".",HPO_id))

Genes / term

gene_counts <- HPO %>%  
  dplyr::group_by(HPO_id, HPO_label) %>% 
  dplyr::summarise(genes=dplyr::n_distinct(entrez_gene_symbol)) %>%
  dplyr::arrange(desc(genes))
#> `summarise()` has grouped output by 'HPO_id'. You can override using the `.groups` argument.
print(gene_counts)
#> # A tibble: 9,595 x 3
#> # Groups:   HPO_id [9,595]
#>    HPO_id     HPO_label                                 genes
#>    <chr>      <chr>                                     <int>
#>  1 HP.0000707 Abnormality of the nervous system          3463
#>  2 HP.0033127 Abnormality of the musculoskeletal system  3369
#>  3 HP.0012638 Abnormal nervous system physiology         3239
#>  4 HP.0000152 Abnormality of head or neck                2878
#>  5 HP.0000234 Abnormality of the head                    2855
#>  6 HP.0000924 Abnormality of the skeletal system         2835
#>  7 HP.0000478 Abnormality of the eye                     2747
#>  8 HP.0011842 Abnormality of skeletal morphology         2726
#>  9 HP.0012639 Abnormal nervous system morphology         2688
#> 10 HP.0000007 Autosomal recessive inheritance            2658
#> # … with 9,585 more rows

dim(subset(gene_counts, genes==1))
#> [1] 1873    3
select_ids <- subset(gene_counts, genes>=4)$HPO_id
print(paste(length(select_ids),"/",n_distinct(gene_counts$HPO_id), "selected."))
#> [1] "6163 / 9595 selected."

hist(gene_counts$genes, breaks = 100)  

Create matrix

HPO$val <- 1
mat <- data.table::dcast.data.table(subset(HPO, HPO_id %in% select_ids), 
                                    formula = entrez_gene_symbol ~ HPO_id,
                                    value.var = "val",
                                    fill = 0, 
                                    fun.aggregate = max) %>%
  data.frame() %>%
  tibble::column_to_rownames(var = "entrez_gene_symbol") %>%
   data.frame()
mat <- as(as.matrix(mat),"sparseMatrix") 


## Specificity matrix
library(Matrix)
normalised_meanExp = t(t(mat)*(1/Matrix::colSums(mat)))
mat_spec = normalised_meanExp/(apply(normalised_meanExp,1,sum)+0.000000000001)

Metadata

Ancestors

Use ontologyIndex to annotation terms further.

library(ontologyIndex)
data("hpo")

if(!exists("ancest_df")){ 
  ancest_1 <- lapply(unique(HPO$HPOid), function(x){
    # print(x)
    tryCatch(expr = {
      terms <- get_term_property(ontology=hpo, property="ancestors", term=x, as_names=T)
      return(terms[2]) 
      }, 
    error = function(e)return(NULL))
  }) %>% `names<-`(unique(HPO$HPOid))
  ancest_2 <- lapply(unique(HPO$HPOid), function(x){
    # print(x)
    tryCatch(expr = {
      terms <- get_term_property(ontology=hpo, property="ancestors", term=x, as_names=T)
      return(terms[3]) 
      }, 
    error = function(e)return(NULL))
  }) %>% `names<-`(unique(HPO$HPOid))
  
  
  ancest_df <- rbind(data.frame(ancestor_label=unlist(ancest_1), ancestor_lvl=1), 
                     data.frame(ancestor_label=unlist(ancest_2),  ancestor_lvl=2)) %>% 
    tibble::rownames_to_column(var = "id") %>%
    tidyr::separate(col = "id", sep = "[.]", into=c("HPOid", "ancestor_id"))  %>%
    data.table::data.table() %>%
    data.table::melt.data.table(id.vars = c("HPOid","ancestor_lvl"), 
                                measure.vars = c("ancestor_id","ancestor_label")) %>%
    dplyr::mutate(variable=paste0(variable,ancestor_lvl)) %>%
    data.table::dcast.data.table(formula = HPOid ~ variable, value.var = "value")
  ancest_df
  
  n_distinct(ancest_df$HPOid)  
}
#> [1] 8383

Merge metadata

Create metadata table for each term.

# parents <- data.table::fread("TermsToUse.csv")
meta <- unique(HPO[,c("HPOid","HPO_id","HPO_label")]) %>% 
  # merge(parents, by.x = "HPO_label", by.y = "HPOterm") %>%
  merge(ancest_df, by="HPOid", all.x = T) %>%
  merge(gene_counts[,c("HPO_id","genes")], by="HPO_id")
meta
#>           HPO_id      HPOid                                        HPO_label
#>    1: HP.0000002 HP:0000002                       Abnormality of body height
#>    2: HP.0000003 HP:0000003                     Multicystic kidney dysplasia
#>    3: HP.0000006 HP:0000006                   Autosomal dominant inheritance
#>    4: HP.0000007 HP:0000007                  Autosomal recessive inheritance
#>    5: HP.0000008 HP:0000008 Abnormal morphology of female internal genitalia
#>   ---                                                                       
#> 9591: HP.3000050 HP:3000050                   Abnormality of odontoid tissue
#> 9592: HP.3000062 HP:3000062      Abnormal internal carotid artery morphology
#> 9593: HP.3000072 HP:3000072 Abnormal levator palpebrae superioris morphology
#> 9594: HP.3000075 HP:3000075                Abnormal lingual nerve morphology
#> 9595: HP.3000077 HP:3000077    Abnormal mandible condylar process morphology
#>       ancestor_id1 ancestor_id2        ancestor_label1
#>    1:   HP:0000118   HP:0001507 Phenotypic abnormality
#>    2:   HP:0000118   HP:0000119 Phenotypic abnormality
#>    3:   HP:0000005   HP:0000006    Mode of inheritance
#>    4:   HP:0000005   HP:0000007    Mode of inheritance
#>    5:   HP:0000118   HP:0000119 Phenotypic abnormality
#>   ---                                                 
#> 9591:   HP:0000118   HP:0000924 Phenotypic abnormality
#> 9592:   HP:0000118   HP:0001626 Phenotypic abnormality
#> 9593:   HP:0000118   HP:0000152 Phenotypic abnormality
#> 9594:   HP:0000118   HP:0000152 Phenotypic abnormality
#> 9595:   HP:0000118   HP:0000152 Phenotypic abnormality
#>                                ancestor_label2 genes
#>    1:                       Growth abnormality  1354
#>    2:  Abnormality of the genitourinary system   109
#>    3:           Autosomal dominant inheritance  1787
#>    4:          Autosomal recessive inheritance  2658
#>    5:  Abnormality of the genitourinary system   439
#>   ---                                               
#> 9591:       Abnormality of the skeletal system   177
#> 9592: Abnormality of the cardiovascular system     9
#> 9593:              Abnormality of head or neck     2
#> 9594:              Abnormality of head or neck     1
#> 9595:              Abnormality of head or neck     3
 

data.table::fwrite(meta %>%dplyr::select(-HPO_id) %>% dplyr::arrange(genes), "DimReduction/HPO_metadata.csv")

Tabula Muris

library(EWCE)
#> Warning: replacing previous import 'biomaRt::select' by 'dplyr::select' when
#> loading 'EWCE'
#> Warning: replacing previous import 'dplyr::combine' by 'gridExtra::combine' when
#> loading 'EWCE'
#> Warning: replacing previous import 'Matrix::cov2cor' by 'stats::cov2cor' when
#> loading 'EWCE'
#> Warning: replacing previous import 'SingleCellExperiment::weights' by
#> 'stats::weights' when loading 'EWCE'
#> Warning: replacing previous import 'dplyr::filter' by 'stats::filter' when
#> loading 'EWCE'
#> Warning: replacing previous import 'dplyr::lag' by 'stats::lag' when loading
#> 'EWCE'
#> Warning: replacing previous import 'Matrix::toeplitz' by 'stats::toeplitz' when
#> loading 'EWCE'
#> Warning: replacing previous import 'Matrix::update' by 'stats::update' when
#> loading 'EWCE'
#> Warning: replacing previous import 'Matrix::tail' by 'utils::tail' when loading
#> 'EWCE'
#> Warning: replacing previous import 'Matrix::head' by 'utils::head' when loading
#> 'EWCE'
try({data("CTD_meta")})
# CTD_meta <- readxl::read_excel("~/projects/model_celltype_conservation/CTD_metadata.xlsx")
ctd_tm <- readRDS(url(subset(CTD_meta, dataset=="TabulaMuris")$url)) 

Binarize matrix

mat_tm <- binarize_ctd(ctd_tm, level=2, 
                       top_quantiles = 2,
                       # top_genes = 100,
                       replace_nonzeros = T)
#> [1] "Selected top 2 quantiles per celltype."
#> [1] "Replacing non-zero values with 1."
dim(mat_tm) 
#> [1] 15854   120

LaManno2020

ctd_lamanno <- readRDS(url(subset(CTD_meta, dataset=="LaManno2020")$url)) 

Binarize matrix

mat_lamanno <- binarize_ctd(ctd_lamanno, level=4, 
                            top_quantiles = 4,
                            replace_nonzeros = T)
#> [1] "Selected top 4 quantiles per celltype."
#> [1] "Replacing non-zero values with 1."
dim(mat_lamanno) 
#> [1] 16052   528

Descartes

ctd_descartes <- readRDS(url(subset(CTD_meta, dataset=="descartes_SampledData")$url)) 

Binarize matrix

mat_desc <- binarize_ctd(ctd_descartes, 
                         level=2, 
                         top_genes = 100,
                         replace_nonzeros = T)
#> [1] "Selected top 100 genes per celltype."
#> [1] "Replacing non-zero values with 1."
dim(mat_desc)
#> [1] 56630   172

Merge data

Merge CTD

library(Matrix.utils)

mat_ctd <- Matrix.utils::merge.Matrix(mat_tm, mat_lamanno, 
                                      by.x = row.names(mat_tm), 
                                      by.y = row.names(mat_lamanno), 
                                      all.x=F, all.y=F)  
# mat_ctd <- mat_tm
 
meta_ctd <- data.frame(id=colnames(mat_ctd)) %>%
   tidyr::separate(col = "id", remove = F, sep = "[.]", 
                  into = c("species","dataset","celltype"), 
                  extra = "merge") %>%
  data.table::data.table()

Merge the HPO and CTD matrices.

MAT <-  
  Matrix.utils::merge.Matrix(mat, mat_ctd, 
                             by.x = row.names(mat), 
                             by.y = row.names(mat_ctd), 
                             all.x=F, all.y=F)  
dim(MAT)
#> [1] 3958 6811
# MAT[is.na(MAT)] <- 0
# MAT[MAT!=0] <-1



merged_meta <- data.table::data.table(id=colnames(MAT)) %>% 
  merge(meta, by.x="id", by.y = "HPO_id", all.x = T) %>%
  merge(meta_ctd, by="id", all.x = T) %>%
  dplyr::mutate(type=factor(ifelse(is.na(HPOid),"celltype","phenotype"), 
                            levels = c("phenotype","celltype"), ordered = T)) %>%
  dplyr::mutate(label=ifelse(is.na(HPO_label),celltype,HPO_label), 
                # genes=ifelse(is.na(genes), n_genes[id], genes),
                species=ifelse(type=="phenotype","human",species),
                dataset=ifelse(type=="phenotype","HPO",dataset)) 

Count genes

n_genes <- sort(DelayedArray::colSums(MAT, na.rm = T), decreasing = T)
# hist(n_genes, breaks = 50)
gene_df <- data.frame(genes=n_genes[n_genes>0]) %>% 
  tibble::rownames_to_column("id") %>%
  merge(merged_meta %>%dplyr::select(-genes), by="id") %>%
  data.table::data.table()

ggplot(gene_df, aes(fill=dataset, x=genes)) +
  geom_histogram(bins = 50) +
  facet_grid(facets=dataset~., scales = "free") +
  theme_bw()


merged_meta$genes <- n_genes[merged_meta$id]

UMAP

Run UMAP

  • Computing Hamming distances should be theoretically better for binary data. But sometimes euclidean produces better inter-group mixing.

The python implementation of UMAP has more distance options.

umap_res <- uwot::umap(X = t(as.matrix(MAT)), 
                       n_components = 2, 
                       ret_extra = c("model","nn","fgraph"),  
                       # metric = "hamming",
                       verbose = T) 
#> 01:50:52 UMAP embedding parameters a = 1.896 b = 0.8006
#> 01:50:54 Read 6811 rows and found 3958 numeric columns
#> 01:50:54 Using Annoy for neighbor search, n_neighbors = 15
#> 01:50:54 Building Annoy index with metric = euclidean, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 01:51:00 Writing NN index file to temp file /var/folders/zq/h7mtybc533b1qzkys_ttgpth0000gn/T//RtmpM1lLtl/file12bd7eea43a2
#> 01:51:00 Searching Annoy index using 6 threads, search_k = 1500
#> 01:51:24 Annoy recall = 100%
#> 01:51:24 Commencing smooth kNN distance calibration using 6 threads
#> 01:51:24 572 smooth knn distance failures
#> 01:51:25 Initializing from normalized Laplacian + noise
#> 01:51:25 Commencing optimization for 500 epochs, with 165276 positive edges
#> 01:51:33 Optimization finished

Plot

embed <- umap_res$embedding %>%
  `colnames<-`(paste0("UMAP_",1:ncol(umap_res$embedding))) %>%
  data.table::data.table() %>%
  dplyr::mutate(id=colnames(MAT)) %>%
  merge(merged_meta, by="id", all.x = T)  


gg_umap <- ggplot(embed, aes(x=UMAP_1, y=UMAP_2, 
                             color=dataset, shape=type, 
                             label=label, size=genes,
                             ancestor_label1=ancestor_label1,
                             ancestor_label2=ancestor_label2)) +
  geom_point(alpha=.5) +
  # scale_color_manual(values = color_dict) + 
  scale_color_manual(values = unname(pals::alphabet())) +
  theme_bw() 
plotly::ggplotly(gg_umap)
#> Warning: Using shapes for an ordinal variable is not advised

Cluster

library(Seurat)
#> Registered S3 method overwritten by 'spatstat':
#>   method     from
#>   print.boxx cli
#> Attaching SeuratObject

seurat <- Seurat::CreateSeuratObject(counts = MAT, 
                                     meta.data = data.frame(embed, row.names = embed$id))
#### Select variable features ####
# Better to include more (perhaps all?) genes
seurat <- Seurat::FindVariableFeatures(seurat)
                                       # nfeatures=nrow(seurat))
gg_var <- Seurat::VariableFeaturePlot(seurat)
# Seurat::VariableFeatures(seurat) <- row.names(seurat)

seurat <- Seurat::NormalizeData(seurat)
seurat <- Seurat::ScaleData(seurat,  vars.to.regress = c("type","dataset")) # genes
#> Regressing out type, dataset
#> Centering and scaling data matrix

seurat <- Seurat::RunPCA(seurat) # slot="count"
#> PC_ 1 
#> Positive:  ASPSCR1, ANXA5, ADD1, ANO6, CYCS, CYP2C9, CYP2A6, IFNAR2, MC3R, KCNE3 
#>     ZNF644, CABP2, WBP2, GREM1, KCNN4, CNGB3, MSH3, CLCN5, IL6R, LCK 
#>     CACNA1S, CYP1B1, CDKN1A, DAXX, CDKN2C, CDKN1B, BLK, CD3E, ANGPTL6, CD3D 
#> Negative:  RERE, TRRAP, HUWE1, SETD5, HDAC8, NSUN2, KLHL7, SKI, SMC3, RPL10 
#>     RAB18, STAG1, SMC1A, ADAT3, RAD21, AUTS2, NIPBL, ARL3, RAB3GAP1, KCNAB2 
#>     TCF20, ZMIZ1, PRDM16, DHDDS, MED25, RLIM, MAPK8IP3, EXOSC9, RAB3GAP2, TRAPPC4 
#> PC_ 2 
#> Positive:  NXN, ROR2, WNT5A, DVL3, FANCL, FZD2, DVL1, UBE2T, CHST3, FLNB 
#>     FANCF, MAD2L2, FANCE, SLX4, FANCB, FANCG, RAD51C, BRIP1, FANCC, B3GAT3 
#>     FANCA, DHODH, SF3B4, FANCI, WNT7A, MEGF8, RPL26, RFWD3, WDR35, CUL7 
#> Negative:  UBA5, GABRA5, NUS1, GRIN2D, YWHAG, NECAP1, FGF12, EEF1A2, TRAK1, AP3B2 
#>     ZC3H14, TUSC3, MBOAT7, SCN3A, ST3GAL3, NDUFA9, GABRB2, NTRK2, KCNB1, NDUFV1 
#>     CACNA1B, CYFIP2, CLIP1, GABRA2, NDUFV2, SLC1A2, NDUFS8, KCNA2, NDUFS4, NDUFS7 
#> PC_ 3 
#> Positive:  CD81, ICOS, CTLA4, CR2, TNFRSF13C, CD19, NFKB1, FAS, IL12A, TNFRSF13B 
#>     MEFV, PRKCD, WAS, TCF3, MYD88, PSMB4, RAG1, NPM1, STAT4, RAG2 
#>     IRF2BP2, ZAP70, LRBA, CD79A, IKZF1, DNASE1L3, IL23R, IL7R, RASGRP1, LRRC8A 
#> Negative:  MBD5, PPP3CA, EEF1A2, SET, KDM5B, MAN1B1, MYT1L, HIVEP2, DCPS, GRIN2B 
#>     MED12L, CAMK2A, CIC, MAPK8IP3, TAOK1, RAB11A, DYNC1H1, ZC3H14, TUSC3, ACTL6B 
#>     STXBP1, CLIP1, CDH15, TRAPPC9, MBOAT7, GRIN1, UPF3B, CACNG2, SLC45A1, BRSK2 
#> PC_ 4 
#> Positive:  TOPORS, PRPF6, RP9, PRPF31, PDE6A, PDE6G, RP1L1, FSCN2, MAK, PRPF3 
#>     FAM161A, SPATA7, ZNF408, SLC7A14, IDH3A, AHR, MERTK, CRX, REEP6, NR2E3 
#>     ROM1, CLRN1, CA4, PDE6B, IMPDH1, USH2A, IMPG2, KIAA1549, RHO, DHX38 
#> Negative:  ICOS, TNFRSF13C, CR2, CD19, NFKB1, PSMB4, TCF3, TNFRSF13B, CD81, RAG1 
#>     RAG2, IL7R, ZAP70, CD79A, LRRC8A, PSMB9, IL2RG, CD79B, ADA, MYPN 
#>     CIITA, WDR1, CD247, CD3D, ELANE, CD3E, TPM3, CFL2, LRBA, KBTBD13 
#> PC_ 5 
#> Positive:  TPM3, ACTA1, NEB, KBTBD13, CFL2, GFPT1, AGRN, TTN, LMOD3, COL13A1 
#>     BIN1, MYO9A, ITGA7, CHAT, MYOT, SLC5A7, CHRNE, DOK7, SLC18A3, KY 
#>     SYT2, SNAP25, COL6A1, MYPN, CHRNB1, ANO5, COL6A2, COL12A1, DNM2, RYR1 
#> Negative:  KDM5B, DOCK8, GRIN2B, TAOK1, RAB11A, CIC, HIVEP2, ZC3H14, CDH15, TUSC3 
#>     BRSK2, CACNG2, CLIP1, TRPM3, KCNQ5, SET, KIRREL3, CAMK2A, TNIK, ST3GAL3 
#>     CRADD, EEF1A2, MBOAT7, SLC45A1, GRIN1, NDST1, FMN2, MED12L, DCPS, HNMT
# Seurat::ElbowPlot(seurat, ndims = 50)
seurat <- Seurat::FindNeighbors(seurat)
#> Computing nearest neighbor graph
#> Computing SNN
seurat <- Seurat::RunUMAP(seurat, dims=1:50) #metric="hamming")
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 01:51:57 UMAP embedding parameters a = 0.9922 b = 1.112
#> 01:51:57 Read 6811 rows and found 50 numeric columns
#> 01:51:57 Using Annoy for neighbor search, n_neighbors = 30
#> 01:51:57 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 01:51:59 Writing NN index file to temp file /var/folders/zq/h7mtybc533b1qzkys_ttgpth0000gn/T//RtmpM1lLtl/file12bd775acd5d4
#> 01:51:59 Searching Annoy index using 1 thread, search_k = 3000
#> 01:52:01 Annoy recall = 99.93%
#> 01:52:01 Commencing smooth kNN distance calibration using 1 thread
#> 01:52:02 151 smooth knn distance failures
#> 01:52:03 Initializing from normalized Laplacian + noise
#> 01:52:03 Commencing optimization for 500 epochs, with 289176 positive edges
#> 01:52:13 Optimization finished
seurat <- Seurat::FindClusters(seurat, reduction="umap")
#> Warning: The following arguments are not used: reduction
#> Warning: The following arguments are not used: reduction
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 6811
#> Number of edges: 193952
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8689
#> Number of communities: 25
#> Elapsed time: 0 seconds

Seurat::DimPlot(seurat, group.by = c("seurat_clusters","type"))

Term enrichment

TF-IDF

seurat <- seurat_tfidf(seurat, 
                        label_var = "label", 
                        cluster_var = "seurat_clusters", 
                        force_new = T)
#> Joining, by = "word"
#> Joining, by = "cluster"
#> Joining, by = "cluster"
saveRDS(seurat, "DimReduction/seurat.rds")

word2vec

Followstutorial here.

library(word2vec) 

model <- word2vec(x = setNames(seurat@meta.data$id, 
                               seurat@meta.data$seurat_clusters), 
                  dim =15,iter =20)
embedding <-as.matrix(model)

library(uwot)
viz <- uwot::umap(embedding,n_neighbors =15,n_threads =10)
rownames(viz) <-rownames(embedding)
df  <- viz %>% `colnames<-`(c("x","y")) %>% data.frame() %>%tibble::rownames_to_column("word")


ggplot(df,aes(x =x,y =y,label =word)) +
  geom_point() + 
  ggrepel::geom_text_repel() + 
  theme_void() + 
  labs(title ="word2vec")
seurat$ancestor_label1[seurat$type=="celltype"] <- "Cell-type"
seurat$ancestor_label1[is.na(seurat$ancestor_label1)] <- "Other phenotype"

gg_tfidf <- umap_tfidf(seurat = seurat,
                       size_var = "genes",
                       shape="type",
                       # color_var = "dataset", 
                       color_var="ancestor_label1",
                       dataset="dataset",
                       shape_var="type",
                       species="species",
                       Label="label", 
                       ancestor_label1="ancestor_label1",
                       density_palette = "Blues",
                       show_plot=T)
#> Joining, by = "word"
#> Joining, by = "cluster"
#> Joining, by = "cluster"
#> `summarise()` has grouped output by 'cluster'. You can override using the `.groups` argument.
#> Warning: Ignoring unknown aesthetics: dataset, shape_var, species, Label,
#> ancestor_label1
#> Warning: Using shapes for an ordinal variable is not advised

plotly::ggplotly(gg_tfidf)
#> Warning: Using shapes for an ordinal variable is not advised
#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues

#> Warning in geom2trace.default(dots[[1L]][[25L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
#>   If you'd like to see this geom implemented,
#>   Please open an issue with your example code at
#>   https://github.com/ropensci/plotly/issues
ggsave("DimReduction/UMAP.tfidf.pdf", gg_tfidf, height = 8, width = 13, dpi = 300) #> Warning: Using shapes for an ordinal variable is not advised

Session info

utils::sessionInfo()
#> R version 4.0.4 (2021-02-15)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] tidytext_0.3.0     SeuratObject_4.0.0 Seurat_4.0.0       Matrix.utils_0.9.8
#> [5] EWCE_0.99.2        ontologyIndex_2.7  Matrix_1.3-2       ggplot2_3.3.3     
#> [9] dplyr_1.0.5       
#> 
#> loaded via a namespace (and not attached):
#>   [1] utf8_1.2.1                  reticulate_1.18            
#>   [3] tidyselect_1.1.0            RSQLite_2.2.4              
#>   [5] AnnotationDbi_1.52.0        htmlwidgets_1.5.3          
#>   [7] grid_4.0.4                  Rtsne_0.15                 
#>   [9] munsell_0.5.0               codetools_0.2-18           
#>  [11] ica_1.0-2                   future_1.21.0              
#>  [13] miniUI_0.1.1.1              withr_2.4.1                
#>  [15] colorspace_2.0-0            Biobase_2.50.0             
#>  [17] highr_0.8                   knitr_1.31                 
#>  [19] rstudioapi_0.13             stats4_4.0.4               
#>  [21] SingleCellExperiment_1.12.0 ROCR_1.0-11                
#>  [23] tensor_1.5                  listenv_0.8.0              
#>  [25] MatrixGenerics_1.2.1        labeling_0.4.2             
#>  [27] GenomeInfoDbData_1.2.4      polyclip_1.10-0            
#>  [29] bit64_4.0.5                 farver_2.1.0               
#>  [31] rprojroot_2.0.2             parallelly_1.23.0          
#>  [33] vctrs_0.3.6                 generics_0.1.0             
#>  [35] xfun_0.22                   BiocFileCache_1.14.0       
#>  [37] R6_2.5.0                    GenomeInfoDb_1.26.4        
#>  [39] isoband_0.2.4               pals_1.6                   
#>  [41] bitops_1.0-6                spatstat.utils_2.0-0       
#>  [43] cachem_1.0.4                DelayedArray_0.16.3        
#>  [45] assertthat_0.2.1            promises_1.2.0.1           
#>  [47] scales_1.1.1                gtable_0.3.0               
#>  [49] globals_0.14.0              goftest_1.2-2              
#>  [51] rlang_0.4.10                splines_4.0.4              
#>  [53] lazyeval_0.2.2              dichromat_2.0-0            
#>  [55] yaml_2.2.1                  reshape2_1.4.4             
#>  [57] abind_1.4-5                 crosstalk_1.1.1            
#>  [59] httpuv_1.5.5                tokenizers_0.2.1           
#>  [61] tools_4.0.4                 ellipsis_0.3.1             
#>  [63] jquerylib_0.1.3             RColorBrewer_1.1-2         
#>  [65] ggdendro_0.1.22             BiocGenerics_0.36.0        
#>  [67] ggridges_0.5.3              Rcpp_1.0.6                 
#>  [69] plyr_1.8.6                  progress_1.2.2             
#>  [71] zlibbioc_1.36.0             purrr_0.3.4                
#>  [73] RCurl_1.98-1.3              prettyunits_1.1.1          
#>  [75] rpart_4.1-15                openssl_1.4.3              
#>  [77] deldir_0.2-10               pbapply_1.4-3              
#>  [79] cowplot_1.1.1               S4Vectors_0.28.1           
#>  [81] zoo_1.8-9                   SummarizedExperiment_1.20.0
#>  [83] grr_0.9.5                   ggrepel_0.9.1              
#>  [85] cluster_2.1.1               here_1.0.1                 
#>  [87] magrittr_2.0.1              data.table_1.13.6          
#>  [89] RSpectra_0.16-0             scattermore_0.7            
#>  [91] lmtest_0.9-38               RANN_2.6.1                 
#>  [93] SnowballC_0.7.0             fitdistrplus_1.1-3         
#>  [95] matrixStats_0.58.0          hms_1.0.0                  
#>  [97] patchwork_1.1.1             mime_0.10                  
#>  [99] evaluate_0.14               xtable_1.8-4               
#> [101] XML_3.99-0.6                IRanges_2.24.1             
#> [103] gridExtra_2.3               compiler_4.0.4             
#> [105] biomaRt_2.46.3              tibble_3.1.0               
#> [107] maps_3.3.0                  KernSmooth_2.23-18         
#> [109] crayon_1.4.1                htmltools_0.5.1.1          
#> [111] mgcv_1.8-34                 later_1.1.0.1              
#> [113] tidyr_1.1.3                 DBI_1.1.1                  
#> [115] dbplyr_2.1.0                MASS_7.3-53.1              
#> [117] rappdirs_0.3.3              cli_2.3.1                  
#> [119] parallel_4.0.4              igraph_1.2.6               
#> [121] GenomicRanges_1.42.0        pkgconfig_2.0.3            
#> [123] plotly_4.9.3                xml2_1.3.2                 
#> [125] bslib_0.2.4                 XVector_0.30.0             
#> [127] janeaustenr_0.1.5           stringr_1.4.0              
#> [129] digest_0.6.27               sctransform_0.3.2          
#> [131] RcppAnnoy_0.0.18            spatstat.data_2.0-0        
#> [133] rmarkdown_2.7               HGNChelper_0.8.1           
#> [135] leiden_0.3.7                uwot_0.1.10                
#> [137] curl_4.3                    shiny_1.6.0                
#> [139] lifecycle_1.0.0             nlme_3.1-152               
#> [141] jsonlite_1.7.2              mapproj_1.2.7              
#> [143] viridisLite_0.3.0           askpass_1.1                
#> [145] limma_3.46.0                fansi_0.4.2                
#> [147] pillar_1.5.1                lattice_0.20-41            
#> [149] homologene_1.4.68.19.3.27   fastmap_1.1.0              
#> [151] httr_1.4.2                  survival_3.2-7             
#> [153] glue_1.4.2                  RNOmni_1.0.0               
#> [155] spatstat_1.64-1             png_0.1-7                  
#> [157] bit_4.0.4                   stringi_1.5.3              
#> [159] sass_0.3.1                  blob_1.2.1                 
#> [161] memoise_2.0.0               irlba_2.3.3                
#> [163] future.apply_1.7.0